library(dplyr)
library(plyr)
library(ggplot2)
`%not in%` <- function (x, table) is.na(match(x, table, nomatch=NA_integer_))

Read in all the delta psi files (10 comparisons each)

For GSE4..

muscle_liver_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_musc_liv_dpsi/muscle_liver.deltapsi_deltapsi.tsv")
muscle_liver_4
muscle_brain_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_musc_brn_dpsi/muscle_brain.deltapsi_deltapsi.tsv")
muscle_heart_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_musc_hrt_dpsi/muscle_heart.deltapsi_deltapsi.tsv")
muscle_testes_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_musc_test_dpsi/muscle_testes.deltapsi_deltapsi.tsv")
liver_heart_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_liv_hrt_dpsi/liver_heart.deltapsi_deltapsi.tsv")
heart_brain_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_hrt_brn_dpsi/heart_brain.deltapsi_deltapsi.tsv")
heart_testes_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_hrt_test_dpsi/heart_testes.deltapsi_deltapsi.tsv")
testes_brain_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_test_brn_dpsi/testes_brain.deltapsi_deltapsi.tsv")
liver_testes_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_liv_test_dpsi/liver_testes.deltapsi_deltapsi.tsv")
liver_brain_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_liv_brn_dpsi/liver_brain.deltapsi_deltapsi.tsv")
full4 <- list(muscle_liver_4,muscle_brain_4,muscle_heart_4,muscle_testes_4,liver_heart_4,heart_brain_4,heart_testes_4,testes_brain_4,liver_testes_4,liver_brain_4) %>%
    Reduce(function(dtf1,dtf2) full_join(dtf1,dtf2,by=c("LSV ID","Gene ID","#Gene Name","Junctions coords","Exons coords","strand","chr")), .)
delta_psi4 <- full4 %>%
  select(starts_with("E(dPSI)")) %>%
  setNames(c("muscle_liver","muscle_brain","muscle_heart","muscle_testes","heart_liver","heart_brain","heart_testes","testes_brain","testes_liver","brain_liver"))
final4 <- data.frame(full4$`#Gene Name`,full4$`Gene ID`,full4$`LSV ID`,full4$A5SS.x,full4$A3SS.x,full4$ES.x,full4$`Junctions coords`,full4$`Exons coords`,full4$strand,full4$chr,delta_psi4)
colnames(final4) <- c("Gene Name","Gene ID","LSV ID","A5SS","A3SS","ES","Junction Coords","Exon Coords","strand","chr","muscle_liver","muscle_brain","muscle_heart","muscle_testes","liver_heart","heart_brain","heart_testes","testes_brain","liver_testes","liver_brain")
final4

For GSE5…

muscle_liver_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_musc_liv_dpsi/muscle_liver.deltapsi_deltapsi.tsv")
muscle_brain_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_musc_brn_dpsi/muscle_brain.deltapsi_deltapsi.tsv")
muscle_heart_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_musc_hrt_dpsi/muscle_heart.deltapsi_deltapsi.tsv")
muscle_testes_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_musc_test_dpsi/muscle_testes.deltapsi_deltapsi.tsv")
liver_heart_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_liv_hrt_dpsi/liver_heart.deltapsi_deltapsi.tsv")
heart_brain_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_hrt_brn_dpsi/heart_brain.deltapsi_deltapsi.tsv")
heart_testes_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_hrt_test_dpsi/heart_testes.deltapsi_deltapsi.tsv") 
testes_brain_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_test_brn_dpsi/testes_brain.deltapsi_deltapsi.tsv") 
liver_testes_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_liv_test_dpsi/liver_testes.deltapsi_deltapsi.tsv") 
liver_brain_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_liv_brn_dpsi/liver_brain.deltapsi_deltapsi.tsv")
full5 <- list(muscle_liver_5,muscle_brain_5,muscle_heart_5,muscle_testes_5,liver_heart_5,heart_brain_5,heart_testes_5,testes_brain_5,liver_testes_5,liver_brain_5) %>%
    Reduce(function(dtf1,dtf2) full_join(dtf1,dtf2,by=c("LSV ID","Gene ID","#Gene Name","Junctions coords","Exons coords","strand","chr")), .)
delta_psi5 <- full5 %>%
  select(starts_with("E(dPSI)")) %>%
  setNames(c("muscle_liver","muscle_brain","muscle_heart","muscle_testes","liver_heart","heart_brain","heart_testes","testes_brain","liver_testes","liver_brain"))
final5 <- data.frame(full5$`#Gene Name`,full5$`Gene ID`,full5$`LSV ID`,full5$A5SS.x,full5$A3SS.x,full5$ES.x,full5$`Junctions coords`,full5$`Exons coords`,full5$strand,full5$chr,delta_psi5)
colnames(final5) <- c("Gene Name","Gene ID","LSV ID","A5SS","A3SS","ES","Junction Coords","Exon Coords","strand","chr","muscle_liver","muscle_brain","muscle_heart","muscle_testes","liver_heart","heart_brain","heart_testes","testes_brain","liver_testes","liver_brain")
final5
# Find LSVs that are present in both experiments
final_lsvs <- final5[final4$`LSV ID` %in% final5$`LSV ID`,]
final_lsvs <- unique(final_lsvs)
nrow(final_lsvs)
[1] 1662
length(unique(final_lsvs$`Gene ID`))
[1] 1154
final_lsvs
final_lsvs[is.na(final_lsvs)] <- 0
invalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generated
final4[is.na(final4)] <- 0
final5[is.na(final5)] <- 0
final_lsvs[1000:1900,]
fwrite(final_lsvs,file="/homes/s1207699/data/rna/MAJIQ_analysis/rat/high_confidence_lsvs.tsv",sep="\t")
# Enrichment for GSE4
rat_ne_nets = read.csv("/homes/s1207699/stuff_fromeric/RAT_NEMMnetlist_nets.bed", sep="\t", header=TRUE)
rat_ne_nets_names = unique(rat_ne_nets$rat)
rat_ne_nets_names[[1]]
[1] ENSRNOG00000031033
2963 Levels: ENSRNOG00000000048 ENSRNOG00000000066 ENSRNOG00000000081 ENSRNOG00000000082 ENSRNOG00000000104 ... ENSRNOG00000040287
mgenes <- fread("/homes/s1207699/data/RAT_muscle_gse4_exprs.txt")
lgenes <- fread("/homes/s1207699/data/RAT_liver_gse4_exprs.txt")
tgenes <- fread("/homes/s1207699/data/RAT_testes_gse4_exprs.txt")
bgenes <- fread("/homes/s1207699/data/RAT_brain_gse4_exprs.txt")
hgenes <- fread("/homes/s1207699/data/RAT_heart_gse4_exprs.txt")
all_genes <- Reduce(union, list(mgenes,lgenes,tgenes,bgenes,hgenes))
all_genes <- unique(union(all_genes[[1]],unique(final4$`Gene ID`)))
# How many rat-genes are NE > MM NETs
exprs_nets <- sum(unique(all_genes) %in% unique(rat_ne_nets_names))
length(exprs_nets)
[1] 1
# How many non-NET genes expressed
exprs_non_nets <- sum(unique(all_genes) %not in% unique(rat_ne_nets_names))
# The number of spliced NETs
s_genes <- unique(final4$`Gene ID`)
changed_all_nets = s_genes[which(s_genes %in% rat_ne_nets_names)]
splice_nets <- length(unique(changed_all_nets))
# Number of spliced non-nets
yy = s_genes[which(s_genes %not in% rat_ne_nets_names)]
length(unique(yy))+length(unique(changed_all_nets)) == length(unique(final4$`Gene ID`))
[1] TRUE
nrow(changed_all_nets)
NULL
splice_non_nets <- length(unique(yy))
# Fisher's exact test 
all_splicing <-
matrix(c(splice_nets, exprs_nets-splice_nets,splice_non_nets, exprs_non_nets-splice_non_nets),
       nrow = 2,
       dimnames =
       list(c("Spliced","Not spliced (but expressed)"),
            c("NET", "Non-NET")))
all_splicing
                             NET Non-NET
Spliced                      907    2960
Not spliced (but expressed) 1767    8159
fisher.test(all_splicing, alternative = "greater")

    Fisher's Exact Test for Count Data

data:  all_splicing
p-value = 6.097e-14
alternative hypothesis: true odds ratio is greater than 1
95 percent confidence interval:
 1.309922      Inf
sample estimates:
odds ratio 
  1.414848 
# Heatmap
heatmap <- data.frame(sapply(final_lsvs[,9:18], function(x)(length(na.omit(x)))))
tissue1 <- t(data.frame(stri_split_fixed(str=rownames(heatmap),pattern="_",n=3)))
heatmap3 <- data.frame(heatmap,tissue1[,1],tissue1[,2])
# I need to reorder the rows to make the heatmap look right, not sure how to do this in an elegant way
tissueone <- c(rep("brain",4),rep("heart",3),rep("liver",2),"muscle")
tissuetwo <- c("heart","liver","muscle","testes","liver","muscle","testes","muscle","testes","testes")
tissuelsvs <- c(473,237,396,884,176,168,687,170,364,602)
heatmap2 <- data.frame(tissueone,tissuetwo,tissuelsvs,stringsAsFactors = FALSE)
# Adapting code from http://pseudofish.com/triangle-heatmaps-in-r-using-ggplot.html
all_heatmap <- ggplot(heatmap2, aes(heatmap2$tissuetwo, heatmap2$tissueone)) +
    ggtitle('Number of changing LSVs in all genes between rat tissues') +
    theme_bw() +
    xlab(NULL) +
    ylab(NULL) +
    geom_tile(aes(fill = heatmap2$tissuelsvs), color='white') +
    scale_fill_gradient(low = 'lightyellow', high = 'red', space = 'Lab') +
    geom_text(aes(label = heatmap2$tissuelsvs)) +
    theme(legend.title = element_blank()) +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    theme(axis.text.x=element_text(angle=90),
          axis.ticks=element_blank(),
          axis.line=element_blank(),
          panel.border=element_blank())
all_heatmap

ggplot(data = heatmap2, aes(x=heatmap2$tissueone, y=heatmap2$tissuetwo, fill=heatmap2$tissuelsvs)) + 
  geom_tile()

# Get NET LSVs so we can make NET heatmap
net_lsvs <- final_lsvs[final_lsvs$`Gene ID` %in% rat_ne_nets_names,]
length(unique(net_lsvs$`Gene Name`))
[1] 327
# NET Heatmap
NETheatmap <- data.frame(sapply(net_lsvs[,9:18], function(x)(length(na.omit(x)))))
tissue1 <- t(data.frame(stri_split_fixed(str=rownames(NETheatmap),pattern="_",n=3)))
heatmap4 <- data.frame(NETheatmap,tissue1[,1],tissue1[,2])
heatmap4
# I need to reorder the rows to make the heatmap look right, not sure how to do this in an elegant way
tissueone <- c(rep("brain",4),rep("heart",3),rep("liver",2),"muscle")
tissuetwo <- c("heart","liver","muscle","testes","liver","muscle","testes","muscle","testes","testes")
tissuelsvs <- c(149,56,127,276,47,50,212,45,110,180)
netheatmap2 <- data.frame(tissueone,tissuetwo,tissuelsvs,stringsAsFactors = FALSE)
netheatmapfinal <- data.frame(netheatmap2$tissueone, netheatmap2$tissuetwo,round(netheatmap2$tissuelsvs/heatmap2$tissuelsvs,2))
netheatmapfinal
# Adapting code from http://pseudofish.com/triangle-heatmaps-in-r-using-ggplot.html
all_heatmap <- ggplot(netheatmapfinal, aes(netheatmapfinal$netheatmap2.tissuetwo, netheatmapfinal$netheatmap2.tissueone)) +
    ggtitle('Number of changing LSVs in all genes between rat tissues') +
    theme_bw() +
    xlab(NULL) +
    ylab(NULL) +
    geom_tile(aes(fill = netheatmapfinal[[3]]), color='white') +
    scale_fill_gradient(low = 'lightyellow', high = 'red', space = 'Lab') +
    geom_text(aes(label = netheatmapfinal[[3]])) +
    theme(legend.title = element_blank()) +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    theme(axis.text.x=element_text(angle=90),
          axis.ticks=element_blank(),
          axis.line=element_blank(),
          panel.border=element_blank())
all_heatmap

final_lsvs %>% filter(is.na(`Junction Coords`))
final4 %>% filter(is.na(`Junction Coords`))
final_lsvs
fwrite(final_lsvs, file="/homes/s1207699/data/rna/MAJIQ_analysis/rat/final_lsvs.tsv")
fwrite(final4, file="/homes/s1207699/data/rna/MAJIQ_analysis/rat/final_lsvs_4.tsv")
fwrite(final5, file="/homes/s1207699/data/rna/MAJIQ_analysis/rat/final_lsvs_5.tsv")
# Go analysis
# Genes in LSVs that were found for both GSE5 and GSE4
genes <- paste0(unique(final_lsvs$`Gene Name`))
filEConn<-file("/homes/s1207699/data/rna/rat_merge_genes_0.2")
writeLines(genes, filEConn)
close(filEConn)
length(genes) #1154 genes identified as the same between gse4 and gse5
[1] 1154
go <- fread("/homes/s1207699/rat_merge_all_GO_results")
go$ANNOTATED_GENES <- strsplit(go$ANNOTATED_GENES,", ")
hello <- vector()
for (i in 1:(nrow(go)-1)){
  y = union(go$ANNOTATED_GENES[[i]],go$ANNOTATED_GENES[[i+1]])
  hello = union(hello,y)
}
hello = unique(final_lsvs$`Gene Name`)
#hello equals all the genes included in the GO analysis
upset_counts <- data.frame(hello)
for (x in 1:nrow(go)){
  p = vector()
  for (i in 1:length(hello)){
    if(hello[i] %in% go$ANNOTATED_GENES[[x]]){
      z = 1
    }
    else{
      z = 0
    }
    p = c(p,z)
  }
  upset_counts[,x+1] = p
}
colnames(upset_counts) <- c("gene",go$TERM)
# Make upset plot
library(UpSetR)
library(data.table)
library(cowplot)
upset_counts
upset(upset_counts,nsets=10,order.by ="freq", main.bar.color = "#722c6b")

upset(upset_counts,nsets=5,order.by ="freq", main.bar.color = "#722c6b")

---
title: "rat splicing"
output:
  html_document: default
  html_notebook: default
---


```{r}
library(dplyr)
library(plyr)
library(ggplot2)
`%not in%` <- function (x, table) is.na(match(x, table, nomatch=NA_integer_))

```

Read in all the delta psi files (10 comparisons each)

For GSE4..

```{r}
muscle_liver_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_musc_liv_dpsi/muscle_liver.deltapsi_deltapsi.tsv")
muscle_liver_4
muscle_brain_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_musc_brn_dpsi/muscle_brain.deltapsi_deltapsi.tsv")
muscle_heart_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_musc_hrt_dpsi/muscle_heart.deltapsi_deltapsi.tsv")
muscle_testes_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_musc_test_dpsi/muscle_testes.deltapsi_deltapsi.tsv")
liver_heart_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_liv_hrt_dpsi/liver_heart.deltapsi_deltapsi.tsv")
heart_brain_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_hrt_brn_dpsi/heart_brain.deltapsi_deltapsi.tsv")
heart_testes_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_hrt_test_dpsi/heart_testes.deltapsi_deltapsi.tsv")
testes_brain_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_test_brn_dpsi/testes_brain.deltapsi_deltapsi.tsv")
liver_testes_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_liv_test_dpsi/liver_testes.deltapsi_deltapsi.tsv")
liver_brain_4 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE41637/all_comp_liv_brn_dpsi/liver_brain.deltapsi_deltapsi.tsv")

full4 <- list(muscle_liver_4,muscle_brain_4,muscle_heart_4,muscle_testes_4,liver_heart_4,heart_brain_4,heart_testes_4,testes_brain_4,liver_testes_4,liver_brain_4) %>%
    Reduce(function(dtf1,dtf2) full_join(dtf1,dtf2,by=c("LSV ID","Gene ID","#Gene Name","Junctions coords","Exons coords","strand","chr")), .)
delta_psi4 <- full4 %>%
  select(starts_with("E(dPSI)")) %>%
  setNames(c("muscle_liver","muscle_brain","muscle_heart","muscle_testes","heart_liver","heart_brain","heart_testes","testes_brain","testes_liver","brain_liver"))
final4 <- data.frame(full4$`#Gene Name`,full4$`Gene ID`,full4$`LSV ID`,full4$A5SS.x,full4$A3SS.x,full4$ES.x,full4$`Junctions coords`,full4$`Exons coords`,full4$strand,full4$chr,delta_psi4)
colnames(final4) <- c("Gene Name","Gene ID","LSV ID","A5SS","A3SS","ES","Junction Coords","Exon Coords","strand","chr","muscle_liver","muscle_brain","muscle_heart","muscle_testes","liver_heart","heart_brain","heart_testes","testes_brain","liver_testes","liver_brain")

final4
```

For GSE5...
```{r}
muscle_liver_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_musc_liv_dpsi/muscle_liver.deltapsi_deltapsi.tsv")
muscle_brain_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_musc_brn_dpsi/muscle_brain.deltapsi_deltapsi.tsv")
muscle_heart_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_musc_hrt_dpsi/muscle_heart.deltapsi_deltapsi.tsv")
muscle_testes_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_musc_test_dpsi/muscle_testes.deltapsi_deltapsi.tsv")
liver_heart_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_liv_hrt_dpsi/liver_heart.deltapsi_deltapsi.tsv")
heart_brain_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_hrt_brn_dpsi/heart_brain.deltapsi_deltapsi.tsv")
heart_testes_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_hrt_test_dpsi/heart_testes.deltapsi_deltapsi.tsv") 
testes_brain_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_test_brn_dpsi/testes_brain.deltapsi_deltapsi.tsv") 
liver_testes_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_liv_test_dpsi/liver_testes.deltapsi_deltapsi.tsv") 
liver_brain_5 <- fread("/homes/s1207699/data/rna/MAJIQ_analysis/rat/GSE53960/all_comp_liv_brn_dpsi/liver_brain.deltapsi_deltapsi.tsv")

full5 <- list(muscle_liver_5,muscle_brain_5,muscle_heart_5,muscle_testes_5,liver_heart_5,heart_brain_5,heart_testes_5,testes_brain_5,liver_testes_5,liver_brain_5) %>%
    Reduce(function(dtf1,dtf2) full_join(dtf1,dtf2,by=c("LSV ID","Gene ID","#Gene Name","Junctions coords","Exons coords","strand","chr")), .)
delta_psi5 <- full5 %>%
  select(starts_with("E(dPSI)")) %>%
  setNames(c("muscle_liver","muscle_brain","muscle_heart","muscle_testes","liver_heart","heart_brain","heart_testes","testes_brain","liver_testes","liver_brain"))
final5 <- data.frame(full5$`#Gene Name`,full5$`Gene ID`,full5$`LSV ID`,full5$A5SS.x,full5$A3SS.x,full5$ES.x,full5$`Junctions coords`,full5$`Exons coords`,full5$strand,full5$chr,delta_psi5)

colnames(final5) <- c("Gene Name","Gene ID","LSV ID","A5SS","A3SS","ES","Junction Coords","Exon Coords","strand","chr","muscle_liver","muscle_brain","muscle_heart","muscle_testes","liver_heart","heart_brain","heart_testes","testes_brain","liver_testes","liver_brain")

final5


```

```{r}
# Find LSVs that are present in both experiments
final_lsvs <- final5[final4$`LSV ID` %in% final5$`LSV ID`,]
final_lsvs <- unique(final_lsvs)
nrow(final_lsvs)
length(unique(final_lsvs$`Gene ID`))
final_lsvs
final_lsvs[is.na(final_lsvs)] <- 0
final4[is.na(final4)] <- 0
final5[is.na(final5)] <- 0
final_lsvs[1000:1900,]
fwrite(final_lsvs,file="/homes/s1207699/data/rna/MAJIQ_analysis/rat/high_confidence_lsvs.tsv",sep="\t")
```



```{r}
# Enrichment for GSE4
rat_ne_nets = read.csv("/homes/s1207699/stuff_fromeric/RAT_NEMMnetlist_nets.bed", sep="\t", header=TRUE)
rat_ne_nets_names = unique(rat_ne_nets$rat)
rat_ne_nets_names[[1]]
mgenes <- fread("/homes/s1207699/data/RAT_muscle_gse4_exprs.txt")
lgenes <- fread("/homes/s1207699/data/RAT_liver_gse4_exprs.txt")
tgenes <- fread("/homes/s1207699/data/RAT_testes_gse4_exprs.txt")
bgenes <- fread("/homes/s1207699/data/RAT_brain_gse4_exprs.txt")
hgenes <- fread("/homes/s1207699/data/RAT_heart_gse4_exprs.txt")
all_genes <- Reduce(union, list(mgenes,lgenes,tgenes,bgenes,hgenes))
all_genes <- unique(union(all_genes[[1]],unique(final4$`Gene ID`)))


# How many rat-genes are NE > MM NETs
exprs_nets <- sum(unique(all_genes) %in% unique(rat_ne_nets_names))
length(exprs_nets)
# How many non-NET genes expressed
exprs_non_nets <- sum(unique(all_genes) %not in% unique(rat_ne_nets_names))

# The number of spliced NETs
s_genes <- unique(final4$`Gene ID`)
changed_all_nets = s_genes[which(s_genes %in% rat_ne_nets_names)]
splice_nets <- length(unique(changed_all_nets))

# Number of spliced non-nets
yy = s_genes[which(s_genes %not in% rat_ne_nets_names)]
length(unique(yy))+length(unique(changed_all_nets)) == length(unique(final4$`Gene ID`))

nrow(changed_all_nets)
splice_non_nets <- length(unique(yy))

```
```{r}
# Fisher's exact test 
all_splicing <-
matrix(c(splice_nets, exprs_nets-splice_nets,splice_non_nets, exprs_non_nets-splice_non_nets),
       nrow = 2,
       dimnames =
       list(c("Spliced","Not spliced (but expressed)"),
            c("NET", "Non-NET")))
all_splicing
fisher.test(all_splicing, alternative = "greater")
```

```{r}
# Heatmap
heatmap <- data.frame(sapply(final_lsvs[,9:18], function(x)(length(na.omit(x)))))
tissue1 <- t(data.frame(stri_split_fixed(str=rownames(heatmap),pattern="_",n=3)))
heatmap3 <- data.frame(heatmap,tissue1[,1],tissue1[,2])

# I need to reorder the rows to make the heatmap look right, not sure how to do this in an elegant way
tissueone <- c(rep("brain",4),rep("heart",3),rep("liver",2),"muscle")
tissuetwo <- c("heart","liver","muscle","testes","liver","muscle","testes","muscle","testes","testes")
tissuelsvs <- c(473,237,396,884,176,168,687,170,364,602)
heatmap2 <- data.frame(tissueone,tissuetwo,tissuelsvs,stringsAsFactors = FALSE)


# Adapting code from http://pseudofish.com/triangle-heatmaps-in-r-using-ggplot.html
all_heatmap <- ggplot(heatmap2, aes(heatmap2$tissuetwo, heatmap2$tissueone)) +
    ggtitle('Number of changing LSVs in all genes between rat tissues') +
    theme_bw() +
    xlab(NULL) +
    ylab(NULL) +
    geom_tile(aes(fill = heatmap2$tissuelsvs), color='white') +
    scale_fill_gradient(low = 'lightyellow', high = 'red', space = 'Lab') +
    geom_text(aes(label = heatmap2$tissuelsvs)) +
    theme(legend.title = element_blank()) +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    theme(axis.text.x=element_text(angle=90),
          axis.ticks=element_blank(),
          axis.line=element_blank(),
          panel.border=element_blank())
all_heatmap
ggplot(data = heatmap2, aes(x=heatmap2$tissueone, y=heatmap2$tissuetwo, fill=heatmap2$tissuelsvs)) + 
  geom_tile()
```

```{r}
# Get NET LSVs so we can make NET heatmap

net_lsvs <- final_lsvs[final_lsvs$`Gene ID` %in% rat_ne_nets_names,]
length(unique(net_lsvs$`Gene Name`))

# NET Heatmap
NETheatmap <- data.frame(sapply(net_lsvs[,9:18], function(x)(length(na.omit(x)))))
tissue1 <- t(data.frame(stri_split_fixed(str=rownames(NETheatmap),pattern="_",n=3)))
heatmap4 <- data.frame(NETheatmap,tissue1[,1],tissue1[,2])
heatmap4

# I need to reorder the rows to make the heatmap look right, not sure how to do this in an elegant way
tissueone <- c(rep("brain",4),rep("heart",3),rep("liver",2),"muscle")
tissuetwo <- c("heart","liver","muscle","testes","liver","muscle","testes","muscle","testes","testes")
tissuelsvs <- c(149,56,127,276,47,50,212,45,110,180)
netheatmap2 <- data.frame(tissueone,tissuetwo,tissuelsvs,stringsAsFactors = FALSE)
netheatmapfinal <- data.frame(netheatmap2$tissueone, netheatmap2$tissuetwo,round(netheatmap2$tissuelsvs/heatmap2$tissuelsvs,2))
netheatmapfinal

# Adapting code from http://pseudofish.com/triangle-heatmaps-in-r-using-ggplot.html
all_heatmap <- ggplot(netheatmapfinal, aes(netheatmapfinal$netheatmap2.tissuetwo, netheatmapfinal$netheatmap2.tissueone)) +
    ggtitle('Number of changing LSVs in all genes between rat tissues') +
    theme_bw() +
    xlab(NULL) +
    ylab(NULL) +
    geom_tile(aes(fill = netheatmapfinal[[3]]), color='white') +
    scale_fill_gradient(low = 'lightyellow', high = 'red', space = 'Lab') +
    geom_text(aes(label = netheatmapfinal[[3]])) +
    theme(legend.title = element_blank()) +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    theme(axis.text.x=element_text(angle=90),
          axis.ticks=element_blank(),
          axis.line=element_blank(),
          panel.border=element_blank())
all_heatmap

```

```{r}
final_lsvs %>% filter(is.na(`Junction Coords`))
final4 %>% filter(is.na(`Junction Coords`))
final_lsvs
fwrite(final_lsvs, file="/homes/s1207699/data/rna/MAJIQ_analysis/rat/final_lsvs.tsv")
fwrite(final4, file="/homes/s1207699/data/rna/MAJIQ_analysis/rat/final_lsvs_4.tsv")
fwrite(final5, file="/homes/s1207699/data/rna/MAJIQ_analysis/rat/final_lsvs_5.tsv")

```

```{r}
# Go analysis
# Genes in LSVs that were found for both GSE5 and GSE4
genes <- paste0(unique(final_lsvs$`Gene Name`))
filEConn<-file("/homes/s1207699/data/rna/rat_merge_genes_0.2")
writeLines(genes, filEConn)
close(filEConn)
length(genes) #1154 genes identified as the same between gse4 and gse5


go <- fread("/homes/s1207699/rat_merge_all_GO_results")
go$ANNOTATED_GENES <- strsplit(go$ANNOTATED_GENES,", ")
hello <- vector()
for (i in 1:(nrow(go)-1)){
  y = union(go$ANNOTATED_GENES[[i]],go$ANNOTATED_GENES[[i+1]])
  hello = union(hello,y)
}

hello = unique(final_lsvs$`Gene Name`)
#hello equals all the genes included in the GO analysis
upset_counts <- data.frame(hello)
for (x in 1:nrow(go)){
  p = vector()
  for (i in 1:length(hello)){
    if(hello[i] %in% go$ANNOTATED_GENES[[x]]){
      z = 1
    }
    else{
      z = 0
    }
    p = c(p,z)
  }
  upset_counts[,x+1] = p
}
colnames(upset_counts) <- c("gene",go$TERM)

# Make upset plot
library(UpSetR)
library(data.table)
library(cowplot)
upset_counts
upset(upset_counts,nsets=10,order.by ="freq", main.bar.color = "#722c6b")
upset(upset_counts,nsets=5,order.by ="freq", main.bar.color = "#722c6b")
```

